import requests
from lxml import etree
import re
import os
import pandas as pd

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0',
}
url_list = []
if not os.path.exists('xiuren'):
    os.mkdir('xiuren')
url = input("输入网址：")
# url = 'http://www.newxiuren.com/xiuren10200.aspx'
xr_id = url.split('/')[-1].split('.')[0]
print(xr_id)
response = requests.get(url, headers=headers)
# print(response.text)
print('开始解析...')
html = etree.HTML(response.text)
c1 = html.xpath('//*[@id="content"]/div[*]/a/@href')
# print(c1)
print('开始爬取...')
for i in c1:
    print('-----------分割线------------')
    # print(i)
    # http://www.newxiuren.com/piclist.aspx?id=xiuren202510102
    url_i = 'http://www.newxiuren.com/' + i
    print(url_i)
    response_i = requests.get(url_i, headers=headers)
    html_i = etree.HTML(response_i.text)

    # 标题
    title = html_i.xpath('/html/head/title/text()')[0]

    print(title)
    # 百度网盘

    downlaod = html_i.xpath('//*[@id="downlaod"]/div[1]/@onclick')[0]
    # print(downlaod)
    pan_url = re.findall('\'(.*?)\'', downlaod)[0]

    print(pan_url)

    value = html_i.xpath('//*[@id="copy-content"]/@value')[0]

    print(value)
    ula = [url_i, title, pan_url, value]
    url_list.append(ula)
data = pd.DataFrame(url_list)
data.to_excel(f'xiuren/xiuren{xr_id}.xlsx', index=False, header=['标题', '链接', '网盘下载链接', '密码'])
print('-----------分割线------------')





